Read data
espresso_deseq2 <-
read_tsv(
paste0(wd, 'Tables/Espresso/espresso_deseq2_genetype2_isDET_2024-04-01.tsv')
)
## Rows: 36717 Columns: 29
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (11): transcript_id, transcript_type, transcript_name, gene_id, gene_typ...
## dbl (18): siMETTL2A_baseMean, siMETTL2A_log2FoldChange, siMETTL2A_lfcSE, siM...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
espresso_deseq2
## # A tibble: 36,717 × 29
## transcript_id transcript_type transcript_name gene_id gene_type gene_name
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 ENST00000498442.1 retained_intron CRBN-212 ENSG00… protein_… CRBN
## 2 ENST00000459840.5 retained_intron CRBN-205 ENSG00… protein_… CRBN
## 3 ENST00000231948.9 protein_coding CRBN-201 ENSG00… protein_… CRBN
## 4 ENST00000432408.6 protein_coding CRBN-203 ENSG00… protein_… CRBN
## 5 ENST00000339437.… protein_coding TRNT1-203 ENSG00… protein_… TRNT1
## 6 ENST00000488263.5 retained_intron CRBN-209 ENSG00… protein_… CRBN
## 7 ENST00000420393.5 protein_coding TRNT1-207 ENSG00… protein_… TRNT1
## 8 ENST00000698415.1 retained_intron TRNT1-230 ENSG00… protein_… TRNT1
## 9 ENST00000450014.1 protein_coding CRBN-204 ENSG00… protein_… CRBN
## 10 ENST00000698416.1 retained_intron TRNT1-231 ENSG00… protein_… TRNT1
## # ℹ 36,707 more rows
## # ℹ 23 more variables: siMETTL2A_baseMean <dbl>,
## # siMETTL2A_log2FoldChange <dbl>, siMETTL2A_lfcSE <dbl>,
## # siMETTL2A_stat <dbl>, siMETTL2A_pvalue <dbl>, siMETTL2A_padj <dbl>,
## # siMETTL2A_I_baseMean <dbl>, siMETTL2A_I_log2FoldChange <dbl>,
## # siMETTL2A_I_lfcSE <dbl>, siMETTL2A_I_stat <dbl>, siMETTL2A_I_pvalue <dbl>,
## # siMETTL2A_I_padj <dbl>, siMETTL2A_G_baseMean <dbl>, …
methylated_positions <-
read_tsv(
paste0(wd, 'Tables/DRS/Positions/common_sig_seqs_in_intensity_up_2024-04-10.tsv.gz')
)
## Rows: 605 Columns: 65
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (30): transcript_id, transcript_name, ref_kmer, GMM_cov_type_G, cluster_...
## dbl (35): position, GMM_logit_pvalue_G, KS_dwell_pvalue_G, KS_intensity_pval...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
methylated_positions
## # A tibble: 605 × 65
## transcript_id transcript_name position ref_kmer GMM_logit_pvalue_G
## <chr> <chr> <dbl> <chr> <dbl>
## 1 ENST00000429711.7 RPL32-204 422 GCCCA 1
## 2 ENST00000647248.2 RPL35A-211 380 ACCCC 1
## 3 ENST00000647248.2 RPL35A-211 381 CCCCT 1
## 4 ENST00000389680.2 MT-RNR1-201 43 ACACA 1
## 5 ENST00000389680.2 MT-RNR1-201 57 CCCCG 1
## 6 ENST00000389680.2 MT-RNR1-201 71 GTTCA 1
## 7 ENST00000389680.2 MT-RNR1-201 73 TCACC 1
## 8 ENST00000389680.2 MT-RNR1-201 75 ACCCT 0.777
## 9 ENST00000389680.2 MT-RNR1-201 93 ATCAA 1
## 10 ENST00000389680.2 MT-RNR1-201 138 GCTTA 1
## # ℹ 595 more rows
## # ℹ 60 more variables: KS_dwell_pvalue_G <dbl>, KS_intensity_pvalue_G <dbl>,
## # GMM_cov_type_G <chr>, GMM_n_clust_G <dbl>, cluster_counts_G <chr>,
## # Logit_LOR_G <dbl>, c1_mean_intensity_G <dbl>, c2_mean_intensity_G <dbl>,
## # c1_median_intensity_G <dbl>, c2_median_intensity_G <dbl>,
## # c1_sd_intensity_G <dbl>, c2_sd_intensity_G <dbl>, c1_mean_dwell_G <dbl>,
## # c2_mean_dwell_G <dbl>, c1_median_dwell_G <dbl>, c2_median_dwell_G <dbl>, …
methylated_transcripts <-
methylated_positions |>
select(transcript_id) |>
distinct() |>
mutate(ismethylated = '+')
espresso_deseq2_m3Cinfo <-
espresso_deseq2 |>
left_join(methylated_transcripts) |>
replace_na(list(ismethylated = '-'))
## Joining with `by = join_by(transcript_id)`
espresso_deseq2_m3Cinfo
## # A tibble: 36,717 × 30
## transcript_id transcript_type transcript_name gene_id gene_type gene_name
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 ENST00000498442.1 retained_intron CRBN-212 ENSG00… protein_… CRBN
## 2 ENST00000459840.5 retained_intron CRBN-205 ENSG00… protein_… CRBN
## 3 ENST00000231948.9 protein_coding CRBN-201 ENSG00… protein_… CRBN
## 4 ENST00000432408.6 protein_coding CRBN-203 ENSG00… protein_… CRBN
## 5 ENST00000339437.… protein_coding TRNT1-203 ENSG00… protein_… TRNT1
## 6 ENST00000488263.5 retained_intron CRBN-209 ENSG00… protein_… CRBN
## 7 ENST00000420393.5 protein_coding TRNT1-207 ENSG00… protein_… TRNT1
## 8 ENST00000698415.1 retained_intron TRNT1-230 ENSG00… protein_… TRNT1
## 9 ENST00000450014.1 protein_coding CRBN-204 ENSG00… protein_… CRBN
## 10 ENST00000698416.1 retained_intron TRNT1-231 ENSG00… protein_… TRNT1
## # ℹ 36,707 more rows
## # ℹ 24 more variables: siMETTL2A_baseMean <dbl>,
## # siMETTL2A_log2FoldChange <dbl>, siMETTL2A_lfcSE <dbl>,
## # siMETTL2A_stat <dbl>, siMETTL2A_pvalue <dbl>, siMETTL2A_padj <dbl>,
## # siMETTL2A_I_baseMean <dbl>, siMETTL2A_I_log2FoldChange <dbl>,
## # siMETTL2A_I_lfcSE <dbl>, siMETTL2A_I_stat <dbl>, siMETTL2A_I_pvalue <dbl>,
## # siMETTL2A_I_padj <dbl>, siMETTL2A_G_baseMean <dbl>, …
espresso_deseq2_DET_groupedby_methylation <-
espresso_deseq2_m3Cinfo |>
group_by(ismethylated, common_DETs) |>
reframe(n = n()) |>
group_by(ismethylated) |>
mutate(percentage = 100 * n / sum(n))
espresso_deseq2_DET_groupedby_methylation
## # A tibble: 6 × 4
## # Groups: ismethylated [2]
## ismethylated common_DETs n percentage
## <chr> <chr> <int> <dbl>
## 1 + down 1 1.18
## 2 + other 50 58.8
## 3 + up 34 40
## 4 - down 539 1.47
## 5 - other 35689 97.4
## 6 - up 404 1.10
espresso_deseq2_DET_groupedby_methylation_genetypes <-
espresso_deseq2_m3Cinfo |>
group_by(ismethylated, genetype2, common_DETs) |>
reframe(n = n()) |>
group_by(ismethylated, genetype2) |>
mutate(percentage = 100 * n / sum(n))
espresso_deseq2_DET_groupedby_methylation_genetypes
## # A tibble: 18 × 5
## # Groups: ismethylated, genetype2 [9]
## ismethylated genetype2 common_DETs n percentage
## <chr> <chr> <chr> <int> <dbl>
## 1 + mRNA down 1 1.39
## 2 + mRNA other 48 66.7
## 3 + mRNA up 23 31.9
## 4 + mt-mRNA up 9 100
## 5 + mt-rRNA up 2 100
## 6 + unannotated gene other 2 100
## 7 - mRNA down 526 1.69
## 8 - mRNA other 30186 97.1
## 9 - mRNA up 378 1.22
## 10 - mt-mRNA other 2 50
## 11 - mt-mRNA up 2 50
## 12 - mt-tRNA other 7 100
## 13 - other ncRNAs down 11 0.238
## 14 - other ncRNAs other 4590 99.4
## 15 - other ncRNAs up 16 0.347
## 16 - unannotated gene down 2 0.219
## 17 - unannotated gene other 904 98.9
## 18 - unannotated gene up 8 0.875
espresso_deseq2_DET_groupedby_methylation_barplot <-
espresso_deseq2_DET_groupedby_methylation |>
ggplot(aes(
x = ismethylated, y = n,
fill = common_DETs
)) +
geom_bar(stat = 'identity', position = position_fill()) +
coord_flip() +
scale_fill_manual(values = c('#3e3ef2', 'grey', '#f23e3e')) +
scale_y_reverse()
espresso_deseq2_DET_groupedby_methylation_barplot |>
ggsave_multiple_formats(outdir = figdir, width = 5, height = 3.5, fontsize = 7)

espresso_deseq2_DET_groupedby_methylation_genetypes_barplot <-
espresso_deseq2_DET_groupedby_methylation_genetypes |>
ggplot(aes(
x = interaction(ismethylated |> fct_rev(), genetype2 |> fct_rev()),
y = n,
fill = common_DETs
)) +
geom_bar(stat = 'identity', position = position_fill()) +
scale_x_discrete(guide = ggh4x::guide_axis_nested(delim = '.')) +
coord_flip() +
scale_fill_manual(values = c('#3e3ef2', 'grey', '#f23e3e')) +
scale_y_reverse()
espresso_deseq2_DET_groupedby_methylation_genetypes_barplot |>
ggsave_multiple_formats(
outdir = figdir, width = 6, height = 5, fontsize = 7
)
## Warning: The S3 guide system was deprecated in ggplot2 3.5.0.
## ℹ It has been replaced by a ggproto system that can be extended.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
